global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/fields.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {

* fields.do
* This do-file defines the c/ipc codes considered machinery fields

use ${dataset_dir}/patstat_orbis/docdb_family_id_cipc_codes.dta, clear
gen cipc4 = substr(cipc6,1,4)
ren cipc4 ipc_maingroup_symbol
mmerge ipc using ${commondata_dir}/patstat_2018b/ipc_techn_field.dta, unmatched(none)
ren ipc_maingroup_symbol cipc4
drop _m

* we keep descriptions below even for the fields we don't use, to avoid dropping duplicates that where previously not dropped
gen cipc3 = substr(cipc4,1,3)
gen cipc3_excl = inlist(cipc3,"F41","F42")
gen cipc4_add = inlist(cipc4,"B42C","B07C")
gen cipc6_add = inlist(cipc6,"G05B19", "G05B2219", "B62D65")
gen cipc6_add_2 = inlist(cipc6,"G05B19", "G05B2219")
gen tfp = techn_field == "Pharmaceuticals"
gen tfc = inlist(techn_field,"Organic fine chemistry","Macromolecular chemistry, polymers")
gen tfa = inlist(techn_field,"Handling","Machine tools","Textile and paper machines","Other special machines")
gen tfa2 = tfa
replace tfa = (cipc3_excl == 0) & (tfa | cipc4_add | cipc6_add)
replace tfa2 = (cipc3_excl == 0) & (tfa2 | cipc6_add_2)
foreach xxx in p c a a2 {
	bys docdb_family_id : egen is`xxx' = max(tf`xxx')
}

*isa is our machinery indicator
keep docdb_family_id is*
duplicates drop

mmerge docdb_family_id using ${commondata_dir}/patstat_2018b/family_info.dta, unmatched(master) ukeep(appln_id)
drop _m

keep if isa
keep appln_id
compress
save ${dataset_dir}/patent_list/pats_tfa.dta, replace


*merge in ciations and restrict to patents only having at least 1 citation.
*this gives us our machinery with at least one ciations set (tfacit1), here on the appln_id level
use ${dataset_dir}/patent_list/pats_tfa.dta, clear
mmerge appln_id using ${commondata_dir}/patstat_2018b/family_info.dta, unmatched(master) ukeep(nb_citing_docdb_fam)
drop _m

keep if nb_citing_docdb_fam >= 1
keep appln_id
save ${dataset_dir}/patent_list/pats_tfacit1.dta, replace

	
*save machinery patent families (i.e. docdb level)
use ${dataset_dir}/patent_list/pats_tfa.dta, clear
mmerge appln_id using ${commondata_dir}/patstat_2018b/family_info.dta, unmatched(master) ukeep(docdb_family_id)
drop _m
keep docdb_family_id
duplicates drop
save ${dataset_dir}/patent_list/docdb_pats_tfa.dta, replace

}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat